
library(ggplot2)
library(dplyr)

mediasources_mpas <- readRDS("./mediasources_mpas_insularity.rds")

# calculate insularity classes
im <- mean(mediasources_mpas$insularity)
isd <- sd(mediasources_mpas$insularity)

# plot histogram
ggplot(mediasources_mpas, aes(x = insularity)) + 
  ylim(c(0,23)) +
  xlim(c(0,1)) +
  geom_histogram(binwidth = 0.01, position = "stack", 
                 color = "white", fill = "grey20") +
  geom_vline(xintercept = c(im+isd, im, im-isd), linetype = "dashed") +
  xlab("Insularity") +
  ylab("Count") + 
  ggtitle("Histogram of News Sources Insularity") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_label(aes(label = "None"), y = 22, x = 0.39) +
  geom_label(aes(label = "Low"), y = 22, x = 0.57) +
  geom_label(aes(label = "Moderate"), y = 22, x = 0.735) +
  geom_label(aes(label = "High"), y = 22, x = 0.92)

# descriptive statistics of insularity distribution
psych::describe(mediasources_mpas$insularity)

# news sources by insularity class
mediasources_mpas$insularity_class <- as.factor(mediasources_mpas$insularity_class)
mediasources_mpas$insularity_class <- ordered(mediasources_mpas$insularity_class, 
                                              levels = c("High", "Moderate", "Low", "None"))

tab_1 <- table(mediasources_mpas$insularity_class)
myprop <- round(prop.table(tab_1), 3)

myprop <- data.frame(addmargins(myprop, 1))
tab_1 <- data.frame(addmargins(tab_1, 1))

tab_1 <- data.frame("Insularity" = tab_1$Var1,
                    "n" = tab_1$Freq, 
                    "p" = (myprop$Freq)*100)
tab_1

# "none" and "low" vs. "medium" and "high" insularity
ins_classes <- table(mediasources_mpas$insularity_class)
round(prop.table(ins_classes)[1]+prop.table(ins_classes)[2],2)
round(prop.table(ins_classes)[3]+prop.table(ins_classes)[4],2)

# media sources by party
tab_2 <- mediasources_mpas %>% 
  group_by(adjudication) %>% 
  summarize(n = n()) %>%
  mutate(p = round(n/sum(n),3)*100) %>%
  arrange(-n)
tab_2

# subset parties with higher number of adjudicated news sources
mysubset <- subset(mediasources_mpas, 
                   adjudication == "PD" |
                     adjudication ==  "M5S" |
                     adjudication ==  "LN" | 
                     adjudication ==  "LeU")

# Cross-tabulation
mytab <- table(mysubset$adjudication, mysubset$insularity_class)
addmargins(mytab, 1:2)
mytab <- mytab[,c(3,2,1)] # reorder classes
prop <- prop.table(mytab, margin = 1)

# BarPlot
rownames(prop)[1] <- "LeU (N=49)"
rownames(prop)[2] <- "LN (N=215)"
rownames(prop)[3] <- "M5S (N=177)"
rownames(prop)[4] <- "PD (N=60)"

barplot(t(prop), main = "Insularity Class by MP-MPAS Adjudication",
        xlab = "MP-MPAS Adjudicated Party", ylab = "Proportions",
        col = c("grey40", "grey60", "grey80"),
        legend = c("High", "Moderate", "Low"),
        args.legend = list(fill = c("grey40", "grey60", "grey80"),
                           x = "bottomright"))

# chi squared test
chi_results <- chisq.test(mytab)
chi_results; sum(mytab)

# standardized residuals 
chi_results$stdres

p.value <- 0.001/12
z.critical <- qnorm(1 - (p.value/2))
cat("Z-score alpha 0.001 = ", z.critical, "\n") 
p.value <- 0.01/12
z.critical <- qnorm(1 - (p.value/2))
cat("Z-score alpha 0.01 = ", z.critical, "\n") 
p.value <- 0.05/12
z.critical <- qnorm(1 - (p.value/2))
cat("Z-score alpha 0.5 = ", z.critical, "\n")

round(chi_results$stdres,2)
